# -*- coding:utf8 -*-
import jieba

seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
print("Full Mode: " + "/ ".join(seg_list))  # 全模式

seg_list = jieba.cut("我来到北京清华大学", cut_all=False)
print("Default Mode: " + "/ ".join(seg_list))  # 精确模式

seg_list = jieba.cut("他来到了网易杭研大厦")  # 默认是精确模式
print(", ".join(seg_list))

seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所，后在日本京都大学深造")  # 搜索引擎模式
print(", ".join(seg_list))

seg_list = jieba.cut_for_search("These Things Are Going to Cost You a Lot More in 2018")
print("Default Mode: " + ",".join(seg_list))  # 精确模式


def cut_words(sentence, mode='/'):
    """
    默认使用精确模式分词
    :param sentence:
    :return:
    """
    result = ''
    result = jieba.cut(sentence)
    result = mode.join(result)
    return result


from db_manager import DBSession
from deal_word import settings
from deal_word.utils import help
session = DBSession(settings.db_config)
result = session.execute_query(settings.query_all_info)
for item in result:
    id = item["id"]
    pub_time = str(item["pub_time"])
    title = item["title"]
    title = title.replace('\'', '')
    content = item["content"]
    content = content.replace('\'', '')
    company = item["company"]
    cut_content = cut_words(title) + '/' + cut_words(content)
    cut_content.replace("\'", '')
    create_date = help.get_current_time()
    insert_sql = settings.insert_into_info_word % (str(id), pub_time, cut_content, create_date, company)
    insert_sql = insert_sql.replace('\n', '')
    session.execute_insert(insert_sql)
    print "Insert (%s) success.." % title

